################################################################################
# SIMDATA, this is the function that generates the data!!!
# This came from Therneau, comments and sample objects by SDB May/June 2002
################################################################################
# nn gives the sample size, maxev gives the maximum number of events per subject,
#maxt gives the maximum follow up time, p gives the proportion of subjects that
#receive treatment, rxe is the treatment effect (beta), covar is a function to
#generate the random effect (heterogeneity, comes from a uniform distribution,
#see covar_function.ssc), times gives the time to next event (is either rexptimes.byk
#plusdepends, or exptimes, for example), rho is autocorrelation parameter (but
#is only used in rexptimes.byk as such, set to 1.0 for independence, min and maxi
#give the range of the uniform distribution for the draws in covar.
	
simdata <- function(nn, maxev, maxt, p, rxe, base, covar, times, rho=1, min=-2, maxi=2) {
#	set.seed(1)
    n    <- floor(nn/2)				#floor(x) contains the largest integers <= x (here nn/2). 
    xx   <- covar(nn,min,maxi)		#This says give the function covar nn as the 
									#number of covariates to return.
#xx:
#[1] -1.3487696 -0.3009412 -0.7337485  0.5858178 -1.6648505 -1.6693917 -1.1877509  1.9118737 -0.2451871 -0.9120608
									
    rx   <- c(rep(1,n), rep(0,n))		#This combines into a row a set of 1s 
											#repeated n times and a set of 0s repeated 
											#n times.  NOT SURE WHY.
#rx:
#[1] 1 1 1 1 1 0 0 0 0 0								

    risk <- xx							#Risk is the vector of random effects created by covar
											#(the uniformly distributed random variable)
#risk:
#[1] -1.3487696 -0.3009412 -0.7337485  0.5858178 -1.6648505 -1.6693917 -1.1877509  1.9118737 -0.2451871 -0.9120608

											
										
    risk[1:n] <- risk[1:n] + rep(rxe, round(p*n))	#Replace risk for rows 1 to n with 
															#what's there (random effect) plus
															#rxe repeated for p*n, this adds 
															#the treatment effect (rxe) to 
															#the risk for the portion of the 
															#sample (p*n) that receives treatment.
#risk:
#[1] -2.3487696 -1.3009412 -1.7337485 -0.4141822 -2.6648505 -1.6693917 -1.1877509  1.9118737 -0.2451871 -0.9120608


    rxgrp <- c(rep(1:length(p), round(p*n)), rep(0,n))	#combine into a row: repeat 1 to 
																# # of p-rows p*n times
																#combined with 0 repeated n times
#rxgrp:
#[1] 1 1 1 1 1 0 0 0 0 0

	gapstart<-rep(0,nn)

    risk <- exp(risk) * base			#risk is replaced with exp of risk * baseline risk

#risk:
#[1] 0.09548658 0.27227540 0.17662110 0.66088053 0.06960976 0.18836162 0.30490627 6.76575414 0.78255814 0.40169554

##########################################################################################
# Since the function was passed exptimes for the argument times, times IS exptimes. For an
# explanation of the function exptimes (times) see below.
# times = function to return a matrix of inter-event times
# times(risk, k), where risk[1:n] is the relative hazard of a subject
###########################################################################################

    temp <- times(risk,maxev,rho)			#The function times needed by simdata function is 
											#passed the function exptimes
											#in the call to simdata and so times==exptimes
#temp:
#numeric matrix: 10 rows, 6 columns. 
#           [,1]       [,2]      [,3]      [,4]       [,5]         [,6] 
#[1,]  0.3371361  6.7539995 0.9556816  5.122449 13.5268065 13.606789456
#[2,]  0.8753686  0.5676004 0.2992863  3.328514  3.2698339  0.006568544
#[3,] 21.9678298  4.0214176 0.8315382  8.481424  2.7373305 16.308966673
#[4,]  0.1460226  1.5973235 2.3641998  2.865536  0.8511547  5.007198953
#[5,]  1.4435692 30.7964420 8.6353346  8.334783 97.9373983 29.609612090
#[6,]  3.0869505  8.1448012 3.9846210 14.806031  2.8327831 18.215745656
#            [,1]       [,2]      [,3]       [,4]      [,5]        [,6] 
#[7,] 3.23335777 1.83174331 0.8471382 7.59095895 3.7106107 4.097390580
#[8,] 0.03331137 0.03166055 0.3970570 0.00955944 0.1658431 0.004769739
#[9,] 1.21866204 0.19399182 3.7823740 1.67419288 1.3959125 7.364848660
#[10,] 0.49929317 0.11573545 2.3834150 1.18950664 1.9087917 0.357510353



    temp2<- apply(temp,1,cumsum)      #stop times: cumsum rows of temp -- compute 
											#cumulative sum of rows, gives total time
											#up through that row (so cumulates times up 
											#to the rowth event

#temp2:
#numeric matrix: 6 rows, 10 columns. 
#           [,1]      [,2]     [,3]       [,4]       [,5]     [,6]      [,7]       [,8]      [,9]     [,10] 
#[1,]  0.3371361 0.8753686 21.96783  0.1460226   1.443569  3.08695  3.233358 0.03331137  1.218662 0.4992932
#[2,]  7.0911356 1.4429691 25.98925  1.7433460  32.240011 11.23175  5.065101 0.06497192  1.412654 0.6150286
#[3,]  8.0468172 1.7422554 26.82079  4.1075459  40.875346 15.21637  5.912239 0.46202892  5.195028 2.9984436
#[4,] 13.1692664 5.0707695 35.30221  6.9730821  49.210129 30.02240 13.503198 0.47158836  6.869221 4.1879503
#[5,] 26.6960730 8.3406034 38.03954  7.8242368 147.147527 32.85519 17.213809 0.63743144  8.265133 6.0967420
#[6,] 40.3028624 8.3471720 54.34851 12.8314358 176.757140 51.07093 21.311200 0.64220118 15.629982 6.4542524



    temp3<- rbind(0, temp2[-maxev,])  #start times: This puts 0 as first start time in 
											#all the rows and gets rid of row with the 
											#cum time to last event
#temp3:
#numeric matrix: 6 rows, 10 columns. 
#           [,1]      [,2]     [,3]      [,4]       [,5]     [,6]      [,7]       [,8]     [,9]     [,10] 
#[1,]  0.0000000 0.0000000  0.00000 0.0000000   0.000000  0.00000  0.000000 0.00000000 0.000000 0.0000000
#[2,]  0.3371361 0.8753686 21.96783 0.1460226   1.443569  3.08695  3.233358 0.03331137 1.218662 0.4992932
#[3,]  7.0911356 1.4429691 25.98925 1.7433460  32.240011 11.23175  5.065101 0.06497192 1.412654 0.6150286
#[4,]  8.0468172 1.7422554 26.82079 4.1075459  40.875346 15.21637  5.912239 0.46202892 5.195028 2.9984436
#[5,] 13.1692664 5.0707695 35.30221 6.9730821  49.210129 30.02240 13.503198 0.47158836 6.869221 4.1879503
#[6,] 26.6960730 8.3406034 38.03954 7.8242368 147.147527 32.85519 17.213809 0.63743144 8.265133 6.0967420


    keep <- (temp3 <maxt)				#keep is true or false, true if temp3 is <maxt, 
											#maxt is set to 4 in Therneau's sample 
											
#keep:
#     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] 
#[1,]    T    T    T    T    T    T    T    T    T     T
#[2,]    T    T    F    T    T    T    T    T    T     T
#[3,]    F    T    F    T    F    F    F    T    T     T
#[4,]    F    T    F    F    F    F    F    T    F     T
#[5,]    F    F    F    F    F    F    F    T    F     F
#[6,]    F    F    F    F    F    F    F    T    F     F


    stat <- 1*(temp2 <maxt)				#stat is 1 if temp2 cell is < maxt, otherwise 0 
											#(so it is 1, row fewer than temp3 is true
											#This must help get start and stop times right.
#stat:
#     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] 
#[1,]    1    1    0    1    1    1    1    1    1     1
#[2,]    0    1    0    1    0    0    0    1    1     1
#[3,]    0    1    0    0    0    0    0    1    0     1
#[4,]    0    0    0    0    0    0    0    1    0     0
#[5,]    0    0    0    0    0    0    0    1    0     0
#[6,]    0    0    0    0    0    0    0    1    0     0


    temp2 <- ifelse(temp2<maxt, temp2, maxt)		#This changes temp2 so that it is temp2 
														#if cell is less than maxt and id maxt otherwise

#(newtemp2)
#          [,1]      [,2] [,3]      [,4]     [,5]    [,6]     [,7]       [,8]     [,9]     [,10] 
#[1,] 0.3371361 0.8753686    4 0.1460226 1.443569 3.08695 3.233358 0.03331137 1.218662 0.4992932
#[2,] 4.0000000 1.4429691    4 1.7433460 4.000000 4.00000 4.000000 0.06497192 1.412654 0.6150286
#[3,] 4.0000000 1.7422554    4 4.0000000 4.000000 4.00000 4.000000 0.46202892 4.000000 2.9984436
#[4,] 4.0000000 4.0000000    4 4.0000000 4.000000 4.00000 4.000000 0.47158836 4.000000 4.0000000
#[5,] 4.0000000 4.0000000    4 4.0000000 4.000000 4.00000 4.000000 0.63743144 4.000000 4.0000000
#[6,] 4.0000000 4.0000000    4 4.0000000 4.000000 4.00000 4.000000 0.64220118 4.000000 4.0000000

#td creates a data.frame containing id, start, stop, status,rx,x,enum, and rxgrp
#The id column contains: repeat maxev nn-times, for that number of times, 
#repeat 1:nn, I don't know what the [keep] is. 
#The start column: combines temp3 if keep is true?
#stop combines temp2 if keep is true?
#status ???
#rx repeats maxev nn-times, for that number of times repeat rx 
#(which is 0s and 1s)?, again, don't know what the [keep] is.
#x same thing, except xx is repeated rather than rx.
#enum repeats 1 to maxev, nn times (plus [keep] thing).
#rxgrp repeats rxgrp for the repeats of maxev nn-times, (plus [keep] thing).

#I added these 3 lines to Therneau's program to derive stop times in gap time.
start2<- c(temp3[keep])  #takes values in temp3 (By column) if keep is true and calls it start
stop2<- c(temp2[keep])  #takes values in temp2 (by column) if keep is true and calls it stop
gapstop<-stop2-start2
#return(gapstop)

#first create enum0 - enum-1
#then if stop is maxt then strata is enum-2, unless enum is 0, in which case, keep it 0

enum0<-(rep(1:maxev-1,nn))[keep]
strata<-ifelse(enum0==0,0,ifelse(stop2<maxt,enum0,ifelse(stop2==maxt & enum0==1,enum0,enum0-1)))
#looky<-cbind(start2,stop2,gapstop)

#return(temp2,temp3,keep,gapstop,enum0,strata,looky)

    td <- data.frame(id    = (rep(1:nn,rep(maxev,nn)))[keep],
               start = c(temp3[keep]),  #takes values in temp3 (By column) if keep is true and calls it start
               stop  = c(temp2[keep]),  #takes values in temp2 (by column) if keep is true and calls it stop
               status = stat[keep],
               rx    = (rep(rx, rep(maxev,nn)))[keep],
               x     = (rep(xx, rep(maxev,nn)))[keep],	#this is the random effect
               enum  = (rep(1:maxev, nn))[keep],
				 rxgrp = (rep(rxgrp, rep(maxev,nn)))[keep],
				 gapstart = (rep(gapstart, rep(maxev,nn)))[keep],
				 gapstop = c(gapstop),
				 strata = c(strata))
				
				
    # add interaction terms to the data.frame td
    for (i in 1:maxev)												#for i = 1 to maxev
        td[[paste('rx', i, sep='')]] <- td$rx * (td$enum ==i)	#pastes char rx to i, (without a space) and
																		#put in this column whatever is in rx*true(1), if enum is equal to i
																		#so this is 1 if they had event i

    if (maxev > 2) {													#if case had more than 2 events,
        for (i in 2:(maxev-1))										#then for i from 2 to maxev-1
            td[[paste('rx',i,'p', sep='')]] <- td$rx * (td$enum >=i)		#pastes rx to i to p, no spaces, and puts in this column
																					#rx*true(1) if enum is greater than or equal to i
        }
    td
#write.table(td, "td.txt", sep = ",",T) 
#export.data(td, "td.txt",STATA) 
    }

#td
#   id      start       stop status rx          x enum rxgrp rx1 rx2 rx3 rx4 rx5 rx6 rx2p rx3p rx4p rx5p 
# 1  1 0.00000000 0.33713614      1  1 -1.3487696    1     1   1   0   0   0   0   0    0    0    0    0
# 2  1 0.33713614 4.00000000      0  1 -1.3487696    2     1   0   1   0   0   0   0    1    0    0    0
# 3  2 0.00000000 0.87536864      1  1 -0.3009412    1     1   1   0   0   0   0   0    0    0    0    0
# 4  2 0.87536864 1.44296908      1  1 -0.3009412    2     1   0   1   0   0   0   0    1    0    0    0
# 5  2 1.44296908 1.74225538      1  1 -0.3009412    3     1   0   0   1   0   0   0    1    1    0    0
# 6  2 1.74225538 4.00000000      0  1 -0.3009412    4     1   0   0   0   1   0   0    1    1    1    0
# 7  3 0.00000000 4.00000000      0  1 -0.7337485    1     1   1   0   0   0   0   0    0    0    0    0
# 8  4 0.00000000 0.14602255      1  1  0.5858178    1     1   1   0   0   0   0   0    0    0    0    0
# 9  4 0.14602255 1.74334603      1  1  0.5858178    2     1   0   1   0   0   0   0    1    0    0    0
#10  4 1.74334603 4.00000000      0  1  0.5858178    3     1   0   0   1   0   0   0    1    1    0    0
#11  5 0.00000000 1.44356919      1  1 -1.6648505    1     1   1   0   0   0   0   0    0    0    0    0
#12  5 1.44356919 4.00000000      0  1 -1.6648505    2     1   0   1   0   0   0   0    1    0    0    0
#13  6 0.00000000 3.08695046      1  0 -1.6693917    1     0   0   0   0   0   0   0    0    0    0    0
#14  6 3.08695046 4.00000000      0  0 -1.6693917    2     0   0   0   0   0   0   0    0    0    0    0
#15  7 0.00000000 3.23335777      1  0 -1.1877509    1     0   0   0   0   0   0   0    0    0    0    0
#16  7 3.23335777 4.00000000      0  0 -1.1877509    2     0   0   0   0   0   0   0    0    0    0    0
#   id      start       stop status rx          x enum rxgrp rx1 rx2 rx3 rx4 rx5 rx6 rx2p rx3p rx4p rx5p 
#17  8 0.00000000 0.03331137      1  0  1.9118737    1     0   0   0   0   0   0   0    0    0    0    0
#18  8 0.03331137 0.06497192      1  0  1.9118737    2     0   0   0   0   0   0   0    0    0    0    0
#19  8 0.06497192 0.46202892      1  0  1.9118737    3     0   0   0   0   0   0   0    0    0    0    0
#20  8 0.46202892 0.47158836      1  0  1.9118737    4     0   0   0   0   0   0   0    0    0    0    0
#21  8 0.47158836 0.63743144      1  0  1.9118737    5     0   0   0   0   0   0   0    0    0    0    0
#22  8 0.63743144 0.64220118      1  0  1.9118737    6     0   0   0   0   0   0   0    0    0    0    0
#23  9 0.00000000 1.21866204      1  0 -0.2451871    1     0   0   0   0   0   0   0    0    0    0    0
#24  9 1.21866204 1.41265386      1  0 -0.2451871    2     0   0   0   0   0   0   0    0    0    0    0
#25  9 1.41265386 4.00000000      0  0 -0.2451871    3     0   0   0   0   0   0   0    0    0    0    0
#26 10 0.00000000 0.49929317      1  0 -0.9120608    1     0   0   0   0   0   0   0    0    0    0    0
#27 10 0.49929317 0.61502862      1  0 -0.9120608    2     0   0   0   0   0   0   0    0    0    0    0
#28 10 0.61502862 2.99844363      1  0 -0.9120608    3     0   0   0   0   0   0   0    0    0    0    0
#29 10 2.99844363 4.00000000      0  0 -0.9120608    4     0   0   0   0   0   0   0    0    0    0    0


#out2<-simdata(nn, 11, 50, 1, -1, .1, covar, klambda,1,-.01,.01)
#out2

